clear 

cd "/Users/jingruj3/Library/CloudStorage/Box-Box/LLM and Behavioral Econ"

* Import and clean data for GPT-4 baseline
import excel "data.xlsx", sheet("baseline gpt") firstrow clear
drop if Feature == .
summarize sigma alpha lambda

* Import and clean data for Claude baseline
import excel "data.xlsx", sheet("baseline claude") firstrow clear
drop if Feature == .
summarize sigma alpha lambda

* Import and clean data for Gemini baseline
import excel "data.xlsx", sheet("baseline gemini") firstrow clear
drop if Feature == .
summarize sigma alpha lambda

* Import and clean data for GPT-4 with demographic features
import excel "data.xlsx", sheet("feature gpt") firstrow clear
drop if Feature == .
summarize sigma alpha lambda

* Encode categorical variables
encode Gender, gen(gender_n)
encode Education, gen(Education_n)
encode MaritalStatus, gen(MaritalStatus_n)
encode Location, gen(Location_n)
encode Age, gen(age_n)

* Create group variables for age and education
gen age_group = "<25 Years Old" if Age == "15 to 24"
replace age_group = ">55 Years Old" if Age == "55 to 64" | Age == "Over 65"
replace age_group = "26 to 54" if age_group == ""

gen edu_group = "Low Education Level" if Education == "Below lower secondary" | Education == "Lower secondary"
replace edu_group = "High Education Level" if Education == "Graduate"
replace edu_group = "middle" if edu_group == ""

encode age_group, gen(age_group_n)
encode edu_group, gen(edu_group_n)

* Run regressions for GPT-4 with demographic features
reg sigma i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store sigma_gpt_1
reg alpha i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store alpha_gpt_1
reg lambda i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store lambda_gpt_1

* Import and clean data for Gemini with demographic features (without country distribution)
import excel "data.xlsx", sheet("feature-gemini") firstrow clear
summarize sigma alpha lambda

* Encode categorical variables
encode Gender, gen(gender_n)
encode Education, gen(Education_n)
encode MaritalStatus, gen(MaritalStatus_n)
encode Location, gen(Location_n)
encode Age, gen(age_n)

* Create group variables for age and education
gen age_group = "<25 Years Old" if Age == "15 to 24"
replace age_group = ">55 Years Old" if Age == "55 to 64" | Age == "Over 65"
replace age_group = "26 to 54" if age_group == ""

gen edu_group = "Low Education Level" if Education == "Below lower secondary" | Education == "Lower secondary"
replace edu_group = "High Education Level" if Education == "Graduate"
replace edu_group = "middle" if edu_group == ""

encode age_group, gen(age_group_n)
encode edu_group, gen(edu_group_n)

* Run regressions for Gemini with demographic features
reg sigma i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store sigma_gemini_1
reg alpha i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store alpha_gemini_1
reg lambda i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store lambda_gemini_1

* Import and clean data for Claude with demographic features (without country distribution)
import excel "data.xlsx", sheet("feature claude") firstrow clear
drop if Feature == .
summarize sigma alpha lambda

* Encode categorical variables
encode Gender, gen(gender_n)
encode Education, gen(Education_n)
encode MaritalStatus, gen(MaritalStatus_n)
encode Location, gen(Location_n)
encode Age, gen(age_n)

* Create group variables for age and education
gen age_group = "<25 Years Old" if Age == "15 to 24"
replace age_group = ">55 Years Old" if Age == "55 to 64" | Age == "Over 65"
replace age_group = "26 to 54" if age_group == ""

gen edu_group = "Low Education Level" if Education == "Below lower secondary" | Education == "Lower secondary"
replace edu_group = "High Education Level" if Education == "Graduate"
replace edu_group = "middle" if edu_group == ""

encode age_group, gen(age_group_n)
encode edu_group, gen(edu_group_n)

* Run regressions for Claude with demographic features
reg sigma i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store sigma_claude_1
reg alpha i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store alpha_claude_1
reg lambda i.age_group_n ib2.gender_n ib3.edu_group_n ib3.MaritalStatus_n ib2.Location_n
est store lambda_claude_1
